home *** CD-ROM | disk | FTP | other *** search
Wrap
(*************************************************** Ant Movie Catalog importation script www.antp.be/software/moviecatalog/ [Infos] Authors=Hubert Kosior Title=All Movie Guide Description=All Movie Guide (US) import Site=http://allmovie.com Language=EN Version= Requires=3.5.0 Comments=send bugs and reports to: hubert@tm1.net|a bug corrected by Antoine Potten|to do:| - producer's name instad of producing company| - display movie categories when movie list hit (after searching) License=This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. | GetInfo=1 [Options] ***************************************************) program AllMovie; var MovieName: string; // simple string procedures function StringReplaceAll(S, Old, New: string): string; begin while Pos(Old, S) > 0 do S := StringReplace(S, Old, New); Result := S; end; procedure CutAfter(var Str: string; Pattern: string); begin Str := Copy(str, Pos(Pattern, Str) + Length(Pattern), Length(Str)); end; procedure CutBefore(var Str: string; Pattern: string); begin Str := Copy(Str, Pos(Pattern, Str), Length(Str)); end; // Loads and analyses page from internet (list of movies or direct hit) procedure AnalyzePage(Address: string); var Page: TStringList; begin Page := TStringList.Create; Page.Text := GetPage(Address); // movie list if Pos('movie titles like: ', Page.Text) > 0 then begin PickTreeClear; PickTreeAdd('Search results', ''); AddMoviesTitles(Page); if PickTreeExec(Address) then AnalyzePage(Address); // refine search end else if Pos('Sorry, there is too many possible matches, please adjust your search.', Page.Text) > 0 then begin ShowMessage('Sorry, there is too many possible matches, please adjust your search.'); if Input('All Movie Import', 'Enter the title of the movie:', MovieName) then AnalyzePage('http://allmovie.com/cg/avg.dll?p=avg&type=2&srch=' + URLEncode(MovieName)); // direct hit end else begin SetField(FieldURL, Address); AnalyzeMoviePage(Page) end; end; // Extracts movie details from page procedure AnalyzeMoviePage(MoviePage: TStringList); var Page: string; Value: string; begin Page := MoviePage.Text; // Original title SetField(fieldOriginalTitle, GetStringFromHTML(Page, '<TITLE>', ': ', '</TITLE>')); // Year SetField(fieldYear, GetStringFromHTML(Page, '<B>'+GetField(fieldOriginalTitle)+'</B>', '</TR>', '</B>')); // Country SetField(fieldCountry, GetStringFromHTML(Page, '<B>'+GetField(fieldOriginalTitle)+'</B>', '<I>', '</I>')); // Length SetField(fieldLength, GetStringFromHTML(Page, '<B>'+GetField(fieldOriginalTitle)+'</B>', '</I> - ', ' min')); // AKA -> translated title SetField(fieldTranslatedTitle, GetStringFromHTML(Page, '>AKA', '</TD>', '</td>')); // Rating (multiplied by 2, because 0 <= AMG rating <= 5) Value := GetStringFromHTML(Page, '>AMG Rating', 'alt="', ' Stars'); if Length(Value) > 0 then SetField(fieldRating, FloatToStr(StrToFloat(Value)*2)); // Director SetField(fieldDirector, GetStringFromHTML(Page, '>Director', '</TD>', '</td>')); // Genre -> category SetField(fieldCategory, GetStringFromHTML(Page, '>Genre/Type', '</TD>', '</td>')); // Producing company -> producer SetField(fieldProducer, GetStringFromHTML(Page, '>Produced by', '</TD>', '</TD>')); // Image Value := GetStringFromHTML(Page, 'http://image.allmusic.com', '', '"'); if Length(Value) > 0 then GetPicture(Value); // Plot synopsis -> description Value := GetStringFromHTML(Page, '<A Name="PLOT">', '</table>', '</table>'); if Length(Value) > 0 then SetField(fieldDescription, 'PLOT SYNOPSIS:'+#13#10+Value+#13#10); // Review -> description Value := GetStringFromHTML(Page, '<A Name="REVIEW">', '</table>', '</table>'); if Length(Value) > 0 then SetField(fieldDescription, GetField(fieldDescription)+'AMG REVIEW:'+#13#10+Value+#13#10); // Awards -> description // adjust spaces and line feeds Value := StringReplaceAll(Page, '> <FONT', ''); // space before title Value := StringReplaceAll(Value, '</FONT> </td><td WIDTH=209>', ' - '); // minus before name Value := StringReplaceAll(Value, ' </A></FONT></td>', ' - '); // minus after name (1) Value := StringReplaceAll(Value, ' </FONT></td>', ' - '); // minus after name (2) Value := StringReplaceAll(Value, '</FONT> </td></tr>', + #13#10); // newline after academy name Value := GetStringFromHTML(Value, '<A Name="AWRD">', '</td></tr>', '</TABLE>'); Value := StringReplaceAll(Value, ' ', ' '); Value := StringReplaceAll(Value, ' - - ', ' - '); if Length(Value) > 0 then SetField(fieldDescription, GetField(fieldDescription)+'AWARDS:'+#13#10+Value); // remove trailing newline from description Value := GetField(fieldDescription); if Copy(Value, Length(Value) - 1, 2) = #13#10 then begin Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldDescription, Value); end; // Cast -> actors // adjust semicolons Value := StringReplaceAll(Page, '</I></TD></TR>', '; '); Value := GetStringFromHTML(Value, '<A Name="CAST">', '</td></tr>', '</TABLE>'); if Length(Value) > 0 then begin // remove double spaces if only actor name given while Pos(' ', Value) > 0 do Delete(Value, Pos(' ', Value), 2); // remove trailing "; " if Copy(Value, Length(Value) - 1, 2) = '; ' then Value := Copy(Value, 0, Length(Value) - 2); SetField(fieldActors, Value) end; //DisplayResults; end; // Adds movie titles from search results to tree procedure AddMoviesTitles(ResultsPage: TStringList); var Page: string; MovieTitle, MovieAddress: string; begin Page := ResultsPage.Text; // Every movie entry begins with string "<A HREF='/cg/avg.dll?" while Pos('<A HREF="/cg/avg.dll?', Page) > 0 do begin CutBefore(Page, '<A HREF="/cg/avg.dll?'); MovieAddress := 'http://allmovie.com' + GetStringFromHTML(Page, '<A', '"', '">'); MovieTitle := GetStringFromHTML(Page, '<A', '', '</tr>'); MovieTitle := StringReplace(MovieTitle, ')', '), '); CutAfter(Page, '</tr>'); // add movie to list PickTreeAdd(MovieTitle, MovieAddress); end; end; // Extracts single movie detail (like director, genre) from page function GetStringFromHTML(Page, StartTag, CutTag, EndTag: string): string; begin Result := ''; // recognition tag - if present, extract detail from page, otherwise assume detail is not present if Pos(StartTag, Page) > 0 then begin CutBefore(Page, StartTag); // optional cut tag helps finding right string in html page if Length(CutTag) > 0 then CutAfter(Page, CutTag); // movie detail copied with html tags up to end string Result := Copy(Page, 0, Pos(EndTag, Page) - 1); // remove html tags and decode html string HTMLRemoveTags(Result); HTMLDecode(Result); // ShowMessage('DEBUG: GetStringFromHTML - StartTag "'+StartTag+'", CutTag "'+CutTag+'", EndTag "'+EndTag+'", Result "'+Result+'" ___ '+Page); end; end; procedure RemovePronoun(var Str: string); var i: Integer; s: string; c: char; begin // remove pronouns s := UpperCase(Copy(Str, 0, 4)); if (s = 'LES ') or (s = 'UNE ') or (s = 'THE ') then Str := Copy(Str, 5, Length(Str) - 4) else begin s := Copy(s, 0, 3); if (s = 'LE ') or (s = 'UN ') then Str := Copy(Str, 4, Length(Str) - 3) else begin s := Copy(s, 0, 2); if (s = 'L''') or (s = 'L ') or (s = 'A ') then Str := Copy(Str, 3, Length(Str) - 2) end; end; // remove non-letters, non-digits and non-spaces s := ''; for i := 1 to Length(Str) do begin c := StrGet(Str, i); if ((c<'a') or (c>'z')) and ((c<'A') or (c>'Z')) and ((c<'0') or (c>'9')) and (c<>' ') then else s := s + Copy(Str, i, 1); end; Str := s; end; begin if CheckVersion(3,5,0) then begin MovieName := GetField(fieldOriginalTitle); if MovieName = '' then MovieName := GetField(fieldTranslatedTitle); if Input('All Movie Import', 'Enter the title of the movie (only letters, digits and spaces):', MovieName) then begin if Pos('allmovie.com', MovieName) > 0 then AnalyzePage(MovieName) else begin RemovePronoun(MovieName); AnalyzePage('http://allmovie.com/cg/avg.dll?p=avg&type=2&srch=' + StringReplace(URLEncode(MovieName), '%20', '+')); end; end; end ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.0)'); end.